import jieba
import re
from gensim.models import Word2Vec

# 读取文本文件并进行分词
with open(r"C:\Users\zlsjJSSA\Desktop\训练文本\综合.txt", 'r', encoding='utf-8') as f:
    lines = []
    for line in f:
        # 使用结巴分词对每一行进行分词
        temp = jieba.lcut(line.strip())  # 精确模式，去除行首尾空格

        words = [i for i in temp if re.match(r'^[\u4e00-\u9fa5]+$', i) and len(i)>=2 ] # 只保留中文字符
        if words:
            lines.append(words)

# 打印前5行分词结果以预览
print(lines[:5])

# 训练Word2Vec模型
# 参数：vector_size: 词向量维度；window: 上下文窗口大小；min_count: 最低词频；epochs: 训练轮数；sg: 1使用Skip-gram模型，0使用CBOW模型
model = Word2Vec(sentences=lines, vector_size=100, window=5, min_count=5, epochs=30, sg=1)
print("模型训练完成。")

# 保存模型为文本格式
model_path = r"C:\Users\zlsjJSSA\Desktop\word2vec_model.txt"#替换为实际的路径

try:
    model.wv.save_word2vec_format(model_path, binary=False)
    print(f"模型已保存到 {model_path}")
except Exception as e:
    print(f"保存模型时发生错误: {e}")
